{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src='http://hilpisch.com/taim_logo.png' width=\"350px\" align=\"right\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Artificial Intelligence in Finance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Neural Network Classes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dr Yves J Hilpisch | The AI Machine\n",
    "\n",
    "http://aimachine.io | http://twitter.com/dyjh"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Activation Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pylab import plt, mpl\n",
    "plt.style.use('seaborn')\n",
    "mpl.rcParams['savefig.dpi'] = 300\n",
    "mpl.rcParams['font.family'] = 'serif'\n",
    "np.set_printoptions(suppress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def activation(x, act='linear', deriv=False):\n",
    "    if act == 'sigmoid':\n",
    "        if deriv:\n",
    "            out = activation(x, 'sigmoid', False)\n",
    "            return out * (1 - out)\n",
    "        return 1 / (1 + np.exp(-x))\n",
    "    elif act == 'relu':\n",
    "        if deriv:\n",
    "            return np.where(x > 0, 1, 0)\n",
    "        return np.maximum(x, 0)\n",
    "    elif act == 'softplus':\n",
    "        if deriv:\n",
    "            return activation(x, act='sigmoid')\n",
    "        return np.log(1 + np.exp(x))\n",
    "    elif act == 'linear':\n",
    "        if deriv:\n",
    "            return 1\n",
    "        return x\n",
    "    else:\n",
    "        raise ValueError('Activation function not known.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.linspace(-1, 1, 20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "activation(x, 'sigmoid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "activation(x, 'sigmoid', True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Learning &mdash; Simple Neural Network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class sinn:\n",
    "    def __init__(self, act='linear', lr=0.01, steps=100,\n",
    "                 verbose=False, psteps=200):\n",
    "        self.act = act\n",
    "        self.lr = lr\n",
    "        self.steps = steps\n",
    "        self.verbose = verbose\n",
    "        self.psteps = psteps\n",
    "    def forward(self):\n",
    "        ''' Forward propagation.\n",
    "        '''\n",
    "        self.l2 = activation(np.dot(self.l0, self.w), self.act)\n",
    "    def backward(self):\n",
    "        ''' Backwards propagation.\n",
    "        '''\n",
    "        self.e = self.l2 - self.y\n",
    "        d = self.e * activation(self.l2, self.act, True)\n",
    "        u = self.lr * np.dot(self.l0.T, d)\n",
    "        self.w -= u\n",
    "    def metrics(self, s):\n",
    "        ''' Performance metrics.\n",
    "        '''\n",
    "        mse = (self.e ** 2).mean()\n",
    "        acc = float(sum(self.l2.round() == self.y) / len(self.y))\n",
    "        self.res = self.res.append(\n",
    "            pd.DataFrame({'mse': mse, 'acc': acc}, index=[s,])\n",
    "        )\n",
    "        if s % self.psteps == 0 and self.verbose:\n",
    "                print(f'step={s:5d} | mse={mse:.6f}')\n",
    "                print(f'           | acc={acc:.6f}')\n",
    "    def fit(self, l0, y, steps=None, seed=None):\n",
    "        ''' Fitting step.\n",
    "        '''\n",
    "        self.l0 = l0\n",
    "        self.y = y\n",
    "        if steps is None:\n",
    "            steps = self.steps\n",
    "        self.res = pd.DataFrame()\n",
    "        samples, features = l0.shape\n",
    "        if seed is not None:\n",
    "            np.random.seed(seed)\n",
    "        self.w = np.random.random((features, 1))\n",
    "        for s in range(1, steps + 1):\n",
    "            self.forward()\n",
    "            self.backward()\n",
    "            self.metrics(s)\n",
    "    def predict(self, X):\n",
    "        ''' Prediction step.\n",
    "        '''\n",
    "        return activation(np.dot(X, self.w), self.act)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = 5\n",
    "samples = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(10)\n",
    "l0 = np.random.standard_normal((samples, features))\n",
    "l0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.linalg.matrix_rank(l0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.random.random((samples, 1))\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reg = np.linalg.lstsq(l0, y, rcond=-1)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.allclose(np.dot(l0, reg), y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = sinn(lr=0.015, act='linear', steps=6000,\n",
    "            verbose=True, psteps=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time model.fit(l0, y, seed=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.predict(l0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.predict(l0) - y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = 5\n",
    "samples = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(3)\n",
    "l0 = np.random.randint(0, 2, (samples, features))\n",
    "l0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.linalg.matrix_rank(l0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.random.randint(0, 2, (samples, 1))\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = sinn(lr=0.01, act='sigmoid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time model.fit(l0, y, 4000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.l2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.predict(l0).round() == y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = model.res['acc'].plot(figsize=(10, 6),\n",
    "            title='Prediction Accuracy | Classification')\n",
    "ax.set(xlabel='steps', ylabel='accuracy');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Learning &mdash; One Hidden Layer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Shallow neural network = ONE hidden layer = not DEEP neural network."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class shnn:\n",
    "    def __init__(self, units=12, act='linear', lr=0.01, steps=100,\n",
    "                 verbose=False, psteps=200, seed=None):\n",
    "        self.units = units\n",
    "        self.act = act\n",
    "        self.lr = lr\n",
    "        self.steps = steps\n",
    "        self.verbose = verbose\n",
    "        self.psteps = psteps\n",
    "        self.seed = seed\n",
    "    def initialize(self):\n",
    "        ''' Initializes the random weights.\n",
    "        '''\n",
    "        if self.seed is not None:\n",
    "            np.random.seed(self.seed)\n",
    "        samples, features = self.l0.shape\n",
    "        self.w0 = np.random.random((features, self.units))\n",
    "        self.w1 = np.random.random((self.units, 1))\n",
    "    def forward(self):\n",
    "        ''' Forward propagation.\n",
    "        '''\n",
    "        self.l1 = activation(np.dot(self.l0, self.w0), self.act)\n",
    "        self.l2 = activation(np.dot(self.l1, self.w1), self.act)\n",
    "    def backward(self):\n",
    "        ''' Backward propagation.\n",
    "        '''\n",
    "        self.e = self.l2 - self.y\n",
    "        d2 = self.e * activation(self.l2, self.act, True)\n",
    "        u2 = self.lr * np.dot(self.l1.T, d2)\n",
    "        self.w1 -= u2\n",
    "        e1 = np.dot(d2, self.w1.T)\n",
    "        d1 = e1 * activation(self.l1, self.act, True)\n",
    "        u1 = self.lr * np.dot(self.l0.T, d1)\n",
    "        self.w0 -= u1\n",
    "    def metrics(self, s):\n",
    "        ''' Performance metrics.\n",
    "        '''\n",
    "        mse = (self.e ** 2).mean()\n",
    "        acc = float(sum(self.l2.round() == self.y) / len(self.y))\n",
    "        self.res = self.res.append(\n",
    "            pd.DataFrame({'mse': mse, 'acc': acc}, index=[s,])\n",
    "        )\n",
    "        if s % self.psteps == 0 and self.verbose:\n",
    "                print(f'step={s:5d} | mse={mse:.5f}')\n",
    "                print(f'           | acc={acc:.5f}')\n",
    "    def fit(self, l0, y, steps=None):\n",
    "        ''' Fitting step.\n",
    "        '''\n",
    "        self.l0 = l0\n",
    "        self.y = y\n",
    "        if steps is None:\n",
    "            steps = self.steps\n",
    "        self.res = pd.DataFrame()\n",
    "        self.initialize()\n",
    "        self.forward()\n",
    "        for s in range(1, steps + 1):\n",
    "            self.backward()\n",
    "            self.forward()\n",
    "            self.metrics(s)\n",
    "    def predict(self, X):\n",
    "        ''' Prediction step.\n",
    "        '''\n",
    "        l1 = activation(np.dot(X, self.w0), self.act)\n",
    "        l2 = activation(np.dot(l1, self.w1), self.act)\n",
    "        return l2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = 5\n",
    "samples = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "l0 = np.random.standard_normal((samples, features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.linalg.matrix_rank(l0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.random.random((samples, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reg = np.linalg.lstsq(l0, y, rcond=-1)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(np.dot(l0, reg)  - y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "((np.dot(l0, reg)  - y) ** 2).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = shnn(lr=0.01, units=16, act='softplus',\n",
    "             verbose=True, psteps=2000, seed=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time model.fit(l0, y, 8000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.l2 - y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = shnn(lr=0.025, act='sigmoid', steps=200,\n",
    "             verbose=True, psteps=50, seed=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "l0.round()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.linalg.matrix_rank(l0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y.round()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(l0.round(), y.round())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = model.res.plot(figsize=(10, 6), secondary_y='mse')\n",
    "ax.get_legend().set_bbox_to_anchor((0.2, 0.5));"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Financial Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sym = 'EUR='"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame(raw[sym])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lags = 5\n",
    "cols = []\n",
    "data['r'] = np.log(data / data.shift(1))\n",
    "data['d'] = np.where(data['r'] > 0, 1, 0)\n",
    "for lag in range(1, lags + 1):\n",
    "    col = f'lag_{lag}'\n",
    "    data[col] = data['r'].shift(lag)\n",
    "    cols.append(col)\n",
    "data.dropna(inplace=True)\n",
    "data[cols] = (data[cols] - data[cols].mean()) / data[cols].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = shnn(lr=0.0001, act='sigmoid', steps=10000,\n",
    "             verbose=True, psteps=2000, seed=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = data['d'].values.reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time model.fit(data[cols].values, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['p'] = np.where(model.predict(data[cols]) > 0.5, 1, -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['p'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['s'] = data['p'] * data['r']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# in-sample results\n",
    "data[['r', 's']].sum().apply(np.exp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# in-sample results\n",
    "data[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src='http://hilpisch.com/taim_logo.png' width=\"350px\" align=\"right\">"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
